{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check that the RGI entitites can be attributed to the correct region and sub-region  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is the third of a series of 3 similar notebooks. This one checks that both problems have been resolved."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_dir = 'RGI62'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import shapely.geometry as shpg\n",
    "import progressbar\n",
    "import os\n",
    "import glob\n",
    "import numpy as np\n",
    "from oggm import utils, cfg\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_dir = os.path.abspath(in_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The RGI megafile\n",
    "out = []\n",
    "for reg in range(1, 20):\n",
    "    f = list(glob.glob(in_dir + \"/*/{:02d}_*.shp\".format(reg)))\n",
    "    assert len(f) == 1\n",
    "    sh = gpd.read_file(f[0]).set_index('RGIId')\n",
    "    out.append(sh)\n",
    "mdf =  pd.concat(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add Point geometries for faster checks\n",
    "mdf['points'] = [shpg.Point(lon, lat) for (lon, lat) in zip(mdf.CenLon, mdf.CenLat)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "rgi_reg = gpd.read_file(os.path.join(in_dir, '00_rgi62_regions', '00_rgi62_O1Regions.shp'))\n",
    "rgi_sreg = gpd.read_file(os.path.join(in_dir, '00_rgi62_regions', '00_rgi62_O2Regions.shp'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "mdf['RGI_CODE'] = ['{:02d}-{:02d}'.format(int(d1), int(d2)) for (d1, d2) in zip(mdf.O1Region, mdf.O2Region)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.testing.assert_equal(mdf['O1Region'].unique(), rgi_reg['RGI_CODE'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": []
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (27108 of 27108) |#######| Elapsed Time: 0:00:34 Time:  0:00:34 RGI Reg: 1\n",
      "100% (18855 of 18855) |#######| Elapsed Time: 0:00:14 Time:  0:00:14 RGI Reg: 2\n",
      "100% (4556 of 4556) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 3\n",
      "100% (7415 of 7415) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 4\n",
      "100% (20261 of 20261) |#######| Elapsed Time: 0:00:06 Time:  0:00:06 RGI Reg: 5\n",
      "100% (568 of 568) |###########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 6\n",
      "100% (1615 of 1615) |#########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 7\n",
      "100% (3417 of 3417) |#########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 8\n",
      "100% (1069 of 1069) |#########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 9\n",
      "100% (5151 of 5151) |########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 10\n",
      "100% (3927 of 3927) |########| Elapsed Time: 0:00:01 Time:  0:00:01 RGI Reg: 11\n",
      "100% (1888 of 1888) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 12\n",
      "100% (54429 of 54429) |######| Elapsed Time: 0:00:46 Time:  0:00:46 RGI Reg: 13\n",
      "100% (27988 of 27988) |######| Elapsed Time: 0:00:16 Time:  0:00:16 RGI Reg: 14\n",
      "100% (13119 of 13119) |######| Elapsed Time: 0:00:09 Time:  0:00:09 RGI Reg: 15\n",
      "100% (2939 of 2939) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 16\n",
      "100% (15908 of 15908) |######| Elapsed Time: 0:00:04 Time:  0:00:04 RGI Reg: 17\n",
      "100% (3537 of 3537) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 18\n",
      "100% (2752 of 2752) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI Reg: 19\n"
     ]
    }
   ],
   "source": [
    "mdf['NOT_IN_REG'] = False\n",
    "mdf['NEW_REG'] = ''\n",
    "for reg in mdf['O1Region'].unique():\n",
    "    sel = rgi_reg[rgi_reg.RGI_CODE == reg]\n",
    "    mdf_sel = mdf.loc[mdf.O1Region == reg]\n",
    "    for rid, p, g in progressbar.progressbar(zip(mdf_sel.index, mdf_sel.points, mdf_sel.geometry), \n",
    "                                             max_value=len(mdf_sel), suffix= ' RGI Reg: ' + reg):\n",
    "        if not np.sum(sel.contains(p)) > 0:\n",
    "            if not np.sum(sel.intersects(g)) > 0:\n",
    "                mdf.loc[rid, 'NOT_IN_REG'] = True\n",
    "                \n",
    "                cc = rgi_reg.loc[rgi_reg.contains(p)]\n",
    "                if len(cc) == 0:\n",
    "                    pass\n",
    "                elif len(cc) == 1:\n",
    "                    mdf.loc[rid, 'NEW_REG'] = cc.iloc[0].RGI_CODE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "misplaced = mdf.loc[mdf.NOT_IN_REG & mdf.NEW_REG]\n",
    "assert len(misplaced) == 0\n",
    "not_ok = mdf.loc[mdf.NOT_IN_REG & (mdf.NEW_REG == '')]\n",
    "assert len(not_ok) == 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100% (5773 of 5773) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-02\n",
      "100% (10552 of 10552) |##| Elapsed Time: 0:00:09 Time:  0:00:09 RGI SREG: 01-06\n",
      "100% (4258 of 4258) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-04\n",
      "100% (616 of 616) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 01-01\n",
      "100% (872 of 872) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 01-03\n",
      "100% (5037 of 5037) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 01-05\n",
      "100% (3202 of 3202) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-04\n",
      "100% (7389 of 7389) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 02-02\n",
      "100% (5067 of 5067) |####| Elapsed Time: 0:00:03 Time:  0:00:03 RGI SREG: 02-03\n",
      "100% (1235 of 1235) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-01\n",
      "100% (1962 of 1962) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 02-05\n",
      "100% (880 of 880) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-03\n",
      "100% (627 of 627) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-02\n",
      "100% (536 of 536) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-05\n",
      "100% (216 of 216) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-06\n",
      "100% (2049 of 2049) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-01\n",
      "100% (241 of 241) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-04\n",
      "100% (7 of 7) |##########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 03-07\n",
      "100% (1108 of 1108) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-07\n",
      "100% (71 of 71) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-02\n",
      "100% (65 of 65) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-08\n",
      "100% (1645 of 1645) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-05\n",
      "100% (2249 of 2249) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-06\n",
      "100% (1442 of 1442) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-04\n",
      "100% (277 of 277) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-03\n",
      "100% (455 of 455) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-01\n",
      "100% (103 of 103) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 04-09\n",
      "100% (20261 of 20261) |##| Elapsed Time: 0:00:05 Time:  0:00:05 RGI SREG: 05-01\n",
      "100% (568 of 568) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 06-01\n",
      "100% (1567 of 1567) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 07-01\n",
      "100% (48 of 48) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 07-02\n",
      "100% (1837 of 1837) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 08-01\n",
      "100% (365 of 365) |######| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 08-03\n",
      "100% (1215 of 1215) |####| Elapsed Time: 0:00:06 Time:  0:00:06 RGI SREG: 08-02\n",
      "100% (480 of 480) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 09-02\n",
      "100% (177 of 177) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 09-03\n",
      "100% (412 of 412) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 09-01\n",
      "100% (426 of 426) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-03\n",
      "100% (1660 of 1660) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-05\n",
      "100% (73 of 73) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-06\n",
      "100% (161 of 161) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-01\n",
      "100% (394 of 394) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-02\n",
      "100% (2437 of 2437) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 10-04\n",
      "100% (3892 of 3892) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 11-01\n",
      "100% (35 of 35) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 11-02\n",
      "100% (1637 of 1637) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 12-01\n",
      "100% (251 of 251) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 12-02\n",
      "100% (5397 of 5397) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 13-05\n",
      "100% (5065 of 5065) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 13-09\n",
      "100% (9368 of 9368) |####| Elapsed Time: 0:00:06 Time:  0:00:06 RGI SREG: 13-08\n",
      "100% (5227 of 5227) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 13-04\n",
      "100% (9739 of 9739) |####| Elapsed Time: 0:00:03 Time:  0:00:03 RGI SREG: 13-03\n",
      "100% (10233 of 10233) |##| Elapsed Time: 0:00:04 Time:  0:00:04 RGI SREG: 13-02\n",
      "100% (3151 of 3151) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 13-01\n",
      "100% (3519 of 3519) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 13-06\n",
      "100% (2730 of 2730) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 13-07\n",
      "100% (13757 of 13757) |##| Elapsed Time: 0:00:06 Time:  0:00:06 RGI SREG: 14-02\n",
      "100% (9830 of 9830) |####| Elapsed Time: 0:00:06 Time:  0:00:06 RGI SREG: 14-03\n",
      "100% (4401 of 4401) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 14-01\n",
      "100% (4353 of 4353) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 15-03\n",
      "100% (4238 of 4238) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 15-02\n",
      "100% (4528 of 4528) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 15-01\n",
      "100% (2891 of 2891) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 16-01\n",
      "100% (7 of 7) |##########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 16-02\n",
      "100% (36 of 36) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 16-03\n",
      "100% (5 of 5) |##########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 16-04\n",
      "100% (6474 of 6474) |####| Elapsed Time: 0:00:01 Time:  0:00:01 RGI SREG: 17-02\n",
      "100% (9434 of 9434) |####| Elapsed Time: 0:00:02 Time:  0:00:02 RGI SREG: 17-01\n",
      "100% (3537 of 3537) |####| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 18-01\n",
      "100% (667 of 667) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-20\n",
      "100% (169 of 169) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-21\n",
      "100% (123 of 123) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-19\n",
      "100% (121 of 121) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-15\n",
      "100% (34 of 34) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-16\n",
      "100% (20 of 20) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-18\n",
      "100% (162 of 162) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-11\n",
      "100% (73 of 73) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-13\n",
      "100% (70 of 70) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-14\n",
      "100% (14 of 14) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-24\n",
      "100% (5 of 5) |##########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-22\n",
      "100% (109 of 109) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-17\n",
      "100% (15 of 15) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-05\n",
      "100% (412 of 412) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-02\n",
      "100% (1 of 1) |##########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-12\n",
      "100% (27 of 27) |########| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-01\n",
      "100% (553 of 553) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-03\n",
      "100% (177 of 177) |######| Elapsed Time: 0:00:00 Time:  0:00:00 RGI SREG: 19-04\n"
     ]
    }
   ],
   "source": [
    "mdf['NOT_IN_SREG'] = False\n",
    "mdf['NEW_SREG'] = ''\n",
    "for sreg in mdf['RGI_CODE'].unique():\n",
    "    sel = rgi_sreg[rgi_sreg.RGI_CODE == sreg]\n",
    "    mdf_sel = mdf.loc[mdf.RGI_CODE == sreg]\n",
    "    for rid, p, g in progressbar.progressbar(zip(mdf_sel.index, mdf_sel.points, mdf_sel.geometry), \n",
    "                                             max_value=len(mdf_sel), suffix= ' RGI SREG: ' + sreg):\n",
    "        if not np.sum(sel.contains(p)) > 0:\n",
    "            if not np.sum(sel.intersects(g)) > 0:\n",
    "                mdf.loc[rid, 'NOT_IN_SREG'] = True\n",
    "                \n",
    "                cc = rgi_sreg.loc[rgi_sreg.contains(p)]\n",
    "                if len(cc) == 0:\n",
    "                    pass\n",
    "                elif len(cc) == 1:\n",
    "                    mdf.loc[rid, 'NEW_SREG'] = cc.iloc[0].RGI_CODE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "misplaced = mdf.loc[mdf.NOT_IN_SREG & mdf.NEW_SREG]\n",
    "assert len(misplaced) == 0\n",
    "not_ok = mdf.loc[mdf.NOT_IN_SREG & (mdf.NEW_SREG == '')]\n",
    "assert len(not_ok) == 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
